/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.compaction.mapreduce.avro; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.concurrent.ExecutionException; import org.apache.avro.Schema; import org.apache.avro.Schema.Field; import org.apache.avro.generic.GenericRecord; import org.apache.hadoop.conf.Configuration; import org.codehaus.jackson.JsonFactory; import com.google.common.base.Preconditions; import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; import com.google.common.cache.LoadingCache; import org.codehaus.jackson.JsonParser; import org.codehaus.jackson.map.ObjectMapper; import org.codehaus.jackson.node.ObjectNode; import lombok.extern.slf4j.Slf4j; /** * Extends {@link AvroDeltaFieldNameProvider}, which relies on field {@link #ATTRIBUTE_FIELD} in record schema to get the delta schema. */ @Slf4j public class FieldAttributeBasedDeltaFieldsProvider implements AvroDeltaFieldNameProvider { public static final String ATTRIBUTE_FIELD = "gobblin.compaction." + FieldAttributeBasedDeltaFieldsProvider.class.getSimpleName() + ".deltaAttributeField"; public static final String DELTA_PROP_NAME = "gobblin.compaction." + FieldAttributeBasedDeltaFieldsProvider.class.getSimpleName() + ".deltaPropName"; public static final String DEFAULT_DELTA_PROP_NAME = "delta"; private final String attributeField; private final String deltaPropName; private final LoadingCache<Schema, List<String>> recordSchemaToDeltaSchemaCache; public FieldAttributeBasedDeltaFieldsProvider (Configuration conf) { this.attributeField = conf.get(ATTRIBUTE_FIELD); Preconditions.checkArgument(attributeField != null, "Missing config " + ATTRIBUTE_FIELD); this.deltaPropName = conf.get(DELTA_PROP_NAME, DEFAULT_DELTA_PROP_NAME); this.recordSchemaToDeltaSchemaCache= CacheBuilder.newBuilder().maximumSize(100).build(new CacheLoader<Schema, List<String>>() { @Override public List<String> load(Schema schema) throws Exception { return getDeltaFieldNamesForNewSchema(schema); } }); } @Override public List<String> getDeltaFieldNames(GenericRecord record) { try { return recordSchemaToDeltaSchemaCache.get(record.getSchema()); } catch (ExecutionException e) { throw new RuntimeException(e); } } private List<String> getDeltaFieldNamesForNewSchema(Schema originalSchema) { List<String> deltaFields = new ArrayList<>(); for (Field field : originalSchema.getFields()) { String deltaAttributeField = field.getJsonProp(this.attributeField).getValueAsText(); ObjectNode objectNode = getDeltaPropValue(deltaAttributeField); if (objectNode == null || objectNode.get(this.deltaPropName) == null) { continue; } if (Boolean.parseBoolean(objectNode.get(this.deltaPropName).toString())) { deltaFields.add(field.name()); } } log.info("Will use delta fields: " + deltaFields); return deltaFields; } private ObjectNode getDeltaPropValue(String json) { try { JsonFactory jf = new JsonFactory(); JsonParser jp = jf.createJsonParser(json); ObjectMapper objMap = new ObjectMapper(jf); jp.setCodec(objMap); return (ObjectNode) jp.readValueAsTree(); } catch (IOException e) { return null; } } }